
/* This file matches ASI districts to monthly precip. data using
Seema Jayachandran's Haversine formula method. It also generates
various rainfall shock measures */
* Matching done on code91, since our lat-lon data is by code91.

*Alternate filepath
/*
global do		"C:\Users\SSharma1\Desktop\factories\do"
global rain		"C:\Users\SSharma1\Desktop\factories\data\rainfall"
global data		"C:\Users\SSharma1\Desktop\factories\data"
*/

tempfile GRID DISTRICTS latlong GDISTRICTS rainwide

/* prep data */
use "$rain\rain.dta", clear

** Keep set of grid point coordinates
bys glon glat: keep if _n==1
keep glat glon
save `GRID'


use "$rain\census91_latlong.dta", clear
drop if code91 == 1 |code91 == .
sort code91
g dnumber = _n
sort dnumber
save `DISTRICTS'

* 468 unique code91s

/* Find closest grid point to each district's latitude and longitude */
use `GRID', clear

expand 468  /* finding closest grid point to 573 districts */
sort glat glon
bys glat glon: gen dnumber = _n
sort dnumber
merge dnumber using `DISTRICTS'
* Perfect merge
keep if _merge==3
drop _merge

* Haversine formula for calculating distance between 2 lat/lon points
#delim ;
gen a = (sin((3.14159/180)*(glat-lat)/2))^2 + 
(sin((3.14159/180)*(glon-lon)/2))^2 * cos((3.14159/180)*lat)*cos((3.14159/180)*glat);
#delim cr
gen distance = 6378*2*asin(min(1, sqrt(a)))  /*6378km is Earth's radius */
drop a

bys dnumber: egen min_dist = min(distance)
bys dnumber (distance): keep if _n==1
drop if distance ~= min_dist  /* This is an error check and should be superfluous if the line above worked */ 

keep code91 glat glon
sort code91
save `GRID', replace

use `DISTRICTS', clear
sort code91
merge code91 using `GRID'
tab _merge
* Perfect merge
drop if _merge~=3
drop _merge
save `GDISTRICTS'

* Merging with rain: 2 code91s can have the same glat glon, so merger is tricky


* This rainfall dataset has unique glat glon:
u "$rain\rain.dta", replace
reshape wide  jan feb mar apr may jun jul aug sept oct nov dec annual, i(glat glon) j(year)
sort glat glon
save `rainwide'

u `GDISTRICTS', clear
sort glat glon
merge glat glon using `rainwide'
* Perfect: All 468 code91s have merged with rain data
tab _merge
drop if _merge==2
drop _merge
drop dnumber
reshape long  jan feb mar apr may jun jul aug sept oct nov dec annual, i(code91) j(year)



* generate various rainfall shock measures */
bys code91: egen histmean = mean(annual)
bys code91: egen histsd = sd(annual)

* dummy: >30 percent deviation from historical mean
g shock30 = annual <= .7*histmean | annual >= 1.3*histmean

* dummy: > 50 percent dev from hist mean
g shock50 = annual <=.5*histmean | annual >= 1.5*histmean

* dummy: > 1 std deviation from hist mean
g shockdev = annual <= histmean - histsd | annual >= histmean + histsd

* Separate dummies for too much and too little rain relative to historical mean
g shockhigh =  annual >= histmean + histsd
g shocklow =  annual < histmean -  histsd


* continuous: percentage of historical mean
g shockperc = 100*annual/histmean

* continuous: percent deviation from historical mean
g shockctsdev = 100*abs(1 - annual/histmean)


* seema jayachandran's shock variable: 1 if below 20 percentile, 0 if between 20-80, and -1 otherwise
* Note: positive value means a negative rainfall shock
bys code91: egen p20 = pctile(annual), p(20) 
bys code91: egen p80 = pctile(annual), p(80) 

g shockpctile = 0
replace shockpctile = 1 if annual < p20
replace shockpctile = -1 if annual > p80

* continuous: negative deviation from historical mean, normalized by district historical sd of annual rain
* high values means a negative rainfall shock

ge shocknorm = -((annual-histmean)/histsd)

compress
sort code91 year
saveold "$rain\district_rain.dta", replace

reshape wide   jan feb mar apr may jun jul aug sept oct nov dec annual histmean shock30 shock50 histsd shockdev shockhigh shocklow shockperc shockctsdev shockpctile shocknorm, i(code91) j(year)
saveold "$rain\district_rain_wide_code91.dta", replace





